from pyspark import SparkContext
import os
import shutil

sc = SparkContext()

rdd = sc.parallelize(range(5), 3)
print('origin : ', rdd.glom().collect())
# map
rdd = rdd.map(lambda x: x * 2)
print('map : ', rdd.glom().collect())

# mapPartitions
def funcPart(iterator):
    yield sum(iterator)


rdd = rdd.mapPartitions(funcPart)
print('mapPartitions : ', rdd.glom().collect())


# mapPartitionsWithIndex
def funPartIndex(idx, iterator): yield idx, sum(iterator)


rdd = rdd.mapPartitionsWithIndex(funPartIndex)
print('mapPartitionsWithIndex : ', rdd.glom().collect())
